Data Visualization#

Load data#

Hide code cell source
import pandas as pd
import sys
sys.path.append('../')
from utils.plots import *
output_notebook()

file_path = '../data/'
model_name = 'AML Epigenomic Risk'

# Read the data
df = pd.read_excel(file_path + 'alma_main_results.xlsx', index_col=0).sort_index()
sig_results = pd.read_excel(file_path + 'signature_results.xlsx', index_col=0).sort_index()

df = df.join(sig_results)

# Define train and test samples
df_train = df[df['Train-Test']=='Train Sample']
df_test = df[df['Train-Test'] == 'Test Sample']

# remove duplicates from the test cohort
df_test = df_test[~df_test['Patient_ID'].duplicated(keep='last')]
df_test = df_test[~df_test['Clinical Trial'].isin(['UF HemBank'])]

# Prognostic model samples
df_px = df[~df['Vital Status at 5y'].isna()]
df_px2 = df_px[df_px['Clinical Trial'].isin(['AAML0531', 'AAML1031', 'AAML03P1'])]
df_px2 = df_px2[df_px2['Sample Type'].isin(
    ['Diagnosis', 'Primary Blood Derived Cancer - Bone Marrow', 'Primary Blood Derived Cancer - Peripheral Blood'])]
df_px2 = df_px2[~df_px2['Patient_ID'].duplicated(keep='last')]

# drop the samples with missing labels for the ELN AML 2022 Diagnosis
df_dx = df_train[~df_train['WHO 2022 Diagnosis'].isna()]

# exclude the classes with fewer than 5 samples
df_dx = df_dx[~df_dx['WHO 2022 Diagnosis'].isin(['AML with t(9;22); BCR::ABL1'])]

df_px_ = df_px.sort_values(by='P(Death) at 5y').reset_index().reset_index(names=['Percentile']).set_index('index')
df_px_['Percentile'] = df_px_['Percentile'] / len(df_px_['Percentile'])
df2 = df.join(df_px_[['Percentile']])

# from utils.alma_plot import *

# plot_alma(df2, save_html=False)

# from utils.alma_plot2 import *

# df_px_ = df_px.sort_values(by='38CpG-AMLsignature').reset_index().reset_index(names=['Percentile']).set_index('index')
# df_px_['Percentile'] = df_px_['Percentile'] / len(df_px_['Percentile'])
# df3 = df.join(df_px_[['Percentile']])

# plot_alma(df3, save_html=False)
Loading BokehJS ...
from utils.alma_plot import *

plot_alma(df2, save_html=False)

# from utils.alma_plot2 import *

# df_px_ = df_px.sort_values(by='38CpG-AMLsignature').reset_index().reset_index(names=['Percentile']).set_index('index')
# df_px_['Percentile'] = df_px_['Percentile'] / len(df_px_['Percentile'])
# df3 = df.join(df_px_[['Percentile']])

# plot_alma(df3, save_html=False)

Patient Characteristics#

ALMA (unsupervised)#

Hide code cell source
from tableone import TableOne
from datetime import date

columns = ['Hematopoietic Entity','Age (group years)','Sex',
            'Clinical Trial',]

mytable_cog = TableOne(df_train.reset_index(), columns,
                        overall=False, missing=False,
                        pval=False, pval_adjust=False,
                        htest_name=True,dip_test=True,
                        tukey_test=True, normal_test=True,

                        order={'FLT3 ITD':['Yes','No'],
                                'Age (group years)':['0-5','5-13','13-39','39-60'],
                                'MRD 1 Status': ['Positive'],
                                'Risk Group': ['High Risk', 'Standard Risk'],
                                'FLT3 ITD': ['Yes'],
                                'Leucocyte counts (10⁹/L)': ['≥30'],
                                'Age group (years)': ['≥10']})

mytable_cog.to_excel('../data/pt_characteristics_alma_model_' + str(date.today()) +'.xlsx')

mytable_cog.tabulate(tablefmt="html", 
                        # headers=[score_name,"",'Missing','Discovery','Validation','p-value','Statistical Test']
                        )
Hide code cell output
Overall
n 3314
Hematopoietic Entity, n (%)Acute lymphoblastic leukemia (ALL) 700 (28.3)
Acute myeloid leukemia (AML) 1221 (49.4)
Acute promyelocytic leukemia (APL) 31 (1.3)
Mixed phenotype acute leukemia (MPAL) 48 (1.9)
Myelodysplastic syndrome (MDS or MDS-like)223 (9.0)
Otherwise-Normal (Control) 251 (10.1)
Age (group years), n (%) 0-5 480 (24.1)
5-13 483 (24.2)
13-39 663 (33.2)
39-60 165 (8.3)
60+ 203 (10.2)
Sex, n (%) Female 885 (49.1)
Male 918 (50.9)
Clinical Trial, n (%) AAML03P1 72 (2.2)
AAML0531 628 (18.9)
AAML1031 587 (17.7)
BM normal AAML0531 41 (1.2)
Beat AML Consortium 316 (9.5)
CCG2961 41 (1.2)
CETLAM SMD-09 (MDS-tAML) 166 (5.0)
French GRAALL 2003–2005 141 (4.3)
Japanese AML05 64 (1.9)
NOPHO ALL92-2000 933 (28.2)
TARGET ALL 131 (4.0)
TCGA AML 194 (5.9)

Fine-tuned (supervised) Dx and Px models#

Hide code cell source
columns = ['Age (years)','Age group (years)','Sex','Race or ethnic group',
            'Hispanic or Latino ethnic group', 'MRD 1 Status',
            'Leucocyte counts (10⁹/L)', 'BM leukemic blasts (%)',
            'Risk Group','FLT3 ITD', 'Clinical Trial']

df_test['Age (years)'] = df_test['Age (years)'].astype(float)

# join discovery clinical data with validation clinical data
all_cohorts = pd.concat([df_dx, df_px2, df_test],
                         axis=0, keys=['Dx Discovery','Px Discovery' ,'Validation'],
                         names=['cohort']).reset_index()

# columns = ['Age group (years)','Sex', 'MRD 1 Status',
#             'Leucocyte counts (10⁹/L)',
#             'Risk Group','FLT3 ITD', 'Treatment Arm','Clinical Trial']

mytable_cog = TableOne(all_cohorts, columns,
                        overall=False, missing=False,
                        pval=False, pval_adjust=False,
                        htest_name=True,dip_test=True,
                        tukey_test=True, normal_test=True,

                        order={'FLT3 ITD':['Yes','No'],
                                'Race or ethnic group':['White','Black or African American','Asian'],
                                'MRD 1 Status': ['Positive'],
                                'Risk Group': ['High Risk', 'Standard Risk'],
                                'FLT3 ITD': ['Yes'],
                                'Leucocyte counts (10⁹/L)': ['≥30'],
                                'Age group (years)': ['≥10']},
                                groupby='cohort')

mytable_cog.to_excel('../data/pt_characteristics_fine-tuned_models_' + str(date.today()) +'.xlsx')

mytable_cog.tabulate(tablefmt="html", 
                        # headers=[score_name,"",score_name,'Validation','p-value','Statistical Test']
)
Hide code cell output
Dx Discovery Px Discovery Validation
n 2471 946 200
Age (years), mean (SD) 19.2 (19.7) 9.4 (6.3) 8.8 (6.0)
Age group (years), n (%) ≥10 528 (47.4) 463 (48.9) 95 (48.0)
<10 586 (52.6) 483 (51.1) 103 (52.0)
Sex, n (%) Female 711 (50.5) 468 (49.5) 86 (43.0)
Male 697 (49.5) 478 (50.5) 114 (57.0)
Race or ethnic group, n (%) White 1064 (80.5) 697 (79.1) 142 (71.7)
Black or African American 131 (9.9) 102 (11.6) 32 (16.2)
Asian 65 (4.9) 43 (4.9) 1 (0.5)
American Indian or Alaska Native7 (0.5) 5 (0.6)
Other 48 (3.6) 28 (3.2) 21 (10.6)
Pacific Islander 7 (0.5) 6 (0.7) 2 (1.0)
Hispanic or Latino ethnic group, n (%)Hispanic or Latino 209 (19.6) 185 (20.2) 25 (12.6)
Not Hispanic or Latino 858 (80.4) 731 (79.8) 173 (87.4)
MRD 1 Status, n (%) Positive 284 (29.6) 260 (31.5) 76 (40.4)
Negative 675 (70.4) 566 (68.5) 112 (59.6)
Leucocyte counts (10⁹/L), n (%) ≥30 579 (52.4) 467 (49.4) 87 (43.7)
<30 526 (47.6) 479 (50.6) 112 (56.3)
BM leukemic blasts (%), mean (SD) 65.7 (24.1) 63.8 (24.5) 60.2 (25.6)
Risk Group, n (%) High Risk 198 (14.2) 129 (13.8) 51 (25.5)
Standard Risk 628 (45.0) 454 (48.7) 86 (43.0)
Low Risk 570 (40.8) 349 (37.4) 63 (31.5)
FLT3 ITD, n (%) Yes 180 (16.2) 165 (17.5) 31 (15.7)
No 932 (83.8) 779 (82.5) 167 (84.3)
Clinical Trial, n (%) AAML03P1 62 (2.5) 36 (3.8)
AAML0531 517 (20.9) 507 (53.6)
AAML1031 495 (20.0) 403 (42.6)
BM normal AAML0531 41 (1.7)
Beat AML Consortium 192 (7.8)
CCG2961 31 (1.3)
CETLAM SMD-09 (MDS-tAML) 166 (6.7)
French GRAALL 2003–2005 141 (5.7)
Japanese AML05 9 (0.4)
NOPHO ALL92-2000 641 (25.9)
TARGET ALL 56 (2.3)
TCGA AML 120 (4.9)
AML02 158 (79.0)
AML08 42 (21.0)

By prognostic group#

Discovery#

AML Epigenomic Risk

Hide code cell source
def pt_characteristics_by_model(df, model_name, traintest = 'discovery'):
        columns = ['Age (years)','Age group (years)','Sex','Race or ethnic group',
                'Hispanic or Latino ethnic group', 'MRD 1 Status',
                'Leucocyte counts (10⁹/L)', 'BM leukemic blasts (%)',
                'Risk Group', 'Clinical Trial','FLT3 ITD', 'Treatment Arm']

        mytable_cog = TableOne(df, columns,
                                overall=False, missing=False,
                                pval=True, pval_adjust=False,
                                htest_name=True,dip_test=True,
                                tukey_test=True, normal_test=True,

                                order={'FLT3 ITD':['Yes','No'],
                                        'Race or ethnic group':['White','Black or African American','Asian'],
                                        'MRD 1 Status': ['Positive'],
                                        'Risk Group': ['High Risk', 'Standard Risk'],
                                        'FLT3 ITD': ['Yes'],
                                        'Leucocyte counts (10⁹/L)': ['≥30'],
                                        'Age group (years)': ['≥10']},
                                groupby=model_name)

        mytable_cog.to_excel('../data/pt_characteristics_'+ model_name +'_' + traintest + '_' + str(date.today()) + '.xlsx')

        return(mytable_cog.tabulate(tablefmt="html", 
                                headers=[model_name + ' ' + traintest,"",'High','Low','p-value','Statistical Test']))

pt_characteristics_by_model(df_px2, model_name, 'Discovery')
Hide code cell output
AML Epigenomic Risk Discovery High Low p-value Statistical Test
n 442 504
Age (years), mean (SD) 8.7 (6.5) 10.0 (6.2) 0.002 Two Sample T-test
Age group (years), n (%) ≥10 200 (45.2) 263 (52.2) 0.039 Chi-squared
<10 242 (54.8) 241 (47.8)
Sex, n (%) Female 215 (48.6) 253 (50.2) 0.680 Chi-squared
Male 227 (51.4) 251 (49.8)
Race or ethnic group, n (%) White 323 (78.2) 374 (79.9) 0.971 Chi-squared (warning: expected count < 5)
Black or African American 52 (12.6) 50 (10.7)
Asian 20 (4.8) 23 (4.9)
American Indian or Alaska Native2 (0.5) 3 (0.6)
Other 13 (3.1) 15 (3.2)
Pacific Islander 3 (0.7) 3 (0.6)
Hispanic or Latino ethnic group, n (%)Hispanic or Latino 84 (19.5) 101 (20.8) 0.699 Chi-squared
Not Hispanic or Latino 346 (80.5) 385 (79.2)
MRD 1 Status, n (%) Positive 158 (40.9) 102 (23.2) <0.001 Chi-squared
Negative 228 (59.1) 338 (76.8)
Leucocyte counts (10⁹/L), n (%) ≥30 190 (43.0) 277 (55.0) <0.001 Chi-squared
<30 252 (57.0) 227 (45.0)
BM leukemic blasts (%), mean (SD) 65.5 (26.2)62.2 (22.9)0.050 Two Sample T-test
Risk Group, n (%) High Risk 90 (20.8) 39 (7.8) <0.001 Chi-squared
Standard Risk 310 (71.8) 144 (28.8)
Low Risk 32 (7.4) 317 (63.4)
Clinical Trial, n (%) AAML03P1 19 (4.3) 17 (3.4) 0.017 Chi-squared
AAML0531 215 (48.6) 292 (57.9)
AAML1031 208 (47.1) 195 (38.7)
FLT3 ITD, n (%) Yes 88 (20.0) 77 (15.3) 0.074 Chi-squared
No 353 (80.0) 426 (84.7)
Treatment Arm, n (%) Arm A 110 (47.0) 148 (48.1) 0.878 Chi-squared
Arm B 124 (53.0) 160 (51.9)

38CpG-AMLsignature-37CpGs

Hide code cell source
pt_characteristics_by_model(df_px2, model_name='38CpG-AMLsignature Categorical', traintest='Discovery')
Hide code cell output
38CpG-AMLsignature Categorical Discovery High Low p-value Statistical Test
n 473 473
Age (years), mean (SD) 8.8 (6.6) 10.0 (6.0) 0.003 Two Sample T-test
Age group (years), n (%) ≥10 215 (45.5) 248 (52.4) 0.037 Chi-squared
<10 258 (54.5) 225 (47.6)
Sex, n (%) Female 241 (51.0) 227 (48.0) 0.398 Chi-squared
Male 232 (49.0) 246 (52.0)
Race or ethnic group, n (%) White 341 (78.0) 356 (80.2) 0.130 Chi-squared (warning: expected count < 5)
Black or African American 59 (13.5) 43 (9.7)
Asian 24 (5.5) 19 (4.3)
American Indian or Alaska Native2 (0.5) 3 (0.7)
Other 10 (2.3) 18 (4.1)
Pacific Islander 1 (0.2) 5 (1.1)
Hispanic or Latino ethnic group, n (%) Hispanic or Latino 82 (18.0) 103 (22.4) 0.114 Chi-squared
Not Hispanic or Latino 374 (82.0) 357 (77.6)
MRD 1 Status, n (%) Positive 158 (38.7) 102 (24.4) <0.001 Chi-squared
Negative 250 (61.3) 316 (75.6)
Leucocyte counts (10⁹/L), n (%) ≥30 214 (45.2) 253 (53.5) 0.013 Chi-squared
<30 259 (54.8) 220 (46.5)
BM leukemic blasts (%), mean (SD) 66.2 (25.1)61.4 (23.8)0.004 Two Sample T-test
Risk Group, n (%) High Risk 85 (18.3) 44 (9.4) <0.001 Chi-squared
Standard Risk 323 (69.6) 131 (28.0)
Low Risk 56 (12.1) 293 (62.6)
Clinical Trial, n (%) AAML03P1 18 (3.8) 18 (3.8) 0.375 Chi-squared
AAML0531 264 (55.8) 243 (51.4)
AAML1031 191 (40.4) 212 (44.8)
FLT3 ITD, n (%) Yes 97 (20.6) 68 (14.4) 0.016 Chi-squared
No 375 (79.4) 404 (85.6)
Treatment Arm, n (%) Arm A 133 (47.3) 125 (47.9) 0.964 Chi-squared
Arm B 148 (52.7) 136 (52.1)

Validation#

AML Epigenomic Risk

Hide code cell source
pt_characteristics_by_model(df_test, model_name, 'validation')
Hide code cell output
AML Epigenomic Risk validation High Low p-value Statistical Test
n 88 112
Age (years), mean (SD) 7.9 (6.1) 9.4 (5.8) 0.083 Two Sample T-test
Age group (years), n (%) ≥10 35 (40.7) 60 (53.6) 0.098 Chi-squared
<10 51 (59.3) 52 (46.4)
Sex, n (%) Female 38 (43.2) 48 (42.9) 1.000 Chi-squared
Male 50 (56.8) 64 (57.1)
Race or ethnic group, n (%) White 63 (73.3) 79 (70.5) 0.688 Chi-squared (warning: expected count < 5)
Black or African American14 (16.3) 18 (16.1)
Asian 1 (1.2)
Other 7 (8.1) 14 (12.5)
Pacific Islander 1 (1.2) 1 (0.9)
Hispanic or Latino ethnic group, n (%)Hispanic or Latino 15 (17.4) 10 (8.9) 0.116 Chi-squared
Not Hispanic or Latino 71 (82.6) 102 (91.1)
MRD 1 Status, n (%) Positive 42 (49.4) 34 (33.0) 0.033 Chi-squared
Negative 43 (50.6) 69 (67.0)
Leucocyte counts (10⁹/L), n (%) ≥30 34 (39.1) 53 (47.3) 0.308 Chi-squared
<30 53 (60.9) 59 (52.7)
BM leukemic blasts (%), mean (SD) 67.8 (25.6)54.1 (24.1)<0.001 Two Sample T-test
Risk Group, n (%) High Risk 31 (35.2) 20 (17.9) <0.001 Chi-squared
Standard Risk 47 (53.4) 39 (34.8)
Low Risk 10 (11.4) 53 (47.3)
Clinical Trial, n (%) AML02 71 (80.7) 87 (77.7) 0.732 Chi-squared
AML08 17 (19.3) 25 (22.3)
FLT3 ITD, n (%) Yes 16 (18.4) 15 (13.5) 0.459 Chi-squared
No 71 (81.6) 96 (86.5)
Treatment Arm, n (%) Arm A 46 (53.5) 60 (53.6) 1.000 Chi-squared
Arm B 40 (46.5) 52 (46.4)

38CpG-AMLsignature-37CpGs

Hide code cell source
pt_characteristics_by_model(df_test, model_name='38CpG-AMLsignature Categorical', traintest='Validation')
Hide code cell output
38CpG-AMLsignature Categorical Validation High Low p-value Statistical Test
n 111 89
Age (years), mean (SD) 7.9 (6.2) 9.9 (5.6) 0.020 Two Sample T-test
Age group (years), n (%) ≥10 46 (41.8) 49 (55.7) 0.072 Chi-squared
<10 64 (58.2) 39 (44.3)
Sex, n (%) Female 50 (45.0) 36 (40.4) 0.611 Chi-squared
Male 61 (55.0) 53 (59.6)
Race or ethnic group, n (%) White 79 (72.5) 63 (70.8) 0.854 Chi-squared (warning: expected count < 5)
Black or African American18 (16.5) 14 (15.7)
Asian 1 (0.9)
Other 10 (9.2) 11 (12.4)
Pacific Islander 1 (0.9) 1 (1.1)
Hispanic or Latino ethnic group, n (%) Hispanic or Latino 14 (12.7) 11 (12.5) 1.000 Chi-squared
Not Hispanic or Latino 96 (87.3) 77 (87.5)
MRD 1 Status, n (%) Positive 50 (48.1) 26 (31.0) 0.026 Chi-squared
Negative 54 (51.9) 58 (69.0)
Leucocyte counts (10⁹/L), n (%) ≥30 48 (43.6) 39 (43.8) 1.000 Chi-squared
<30 62 (56.4) 50 (56.2)
BM leukemic blasts (%), mean (SD) 62.3 (27.9)57.7 (22.5)0.227 Two Sample T-test
Risk Group, n (%) High Risk 38 (34.2) 13 (14.6) <0.001 Chi-squared
Standard Risk 59 (53.2) 27 (30.3)
Low Risk 14 (12.6) 49 (55.1)
Clinical Trial, n (%) AML02 86 (77.5) 72 (80.9) 0.678 Chi-squared
AML08 25 (22.5) 17 (19.1)
FLT3 ITD, n (%) Yes 20 (18.2) 11 (12.5) 0.370 Chi-squared
No 90 (81.8) 77 (87.5)
Treatment Arm, n (%) Arm A 62 (56.9) 44 (49.4) 0.367 Chi-squared
Arm B 47 (43.1) 45 (50.6)

Kaplan-Meier Plots#

Overall study population#

AML Epigenomic Risk

Hide code cell source
for dataset, trial in zip([df_px2, df_test], 
                          ['Discovery', 'Validation']):
    draw_kaplan_meier(model_name=model_name,
                        df=dataset,
                        save_survival_table=False,
                        save_plot=False,
                        show_ci=False,
                        add_risk_counts=False,
                        trialname=trial,
                        figsize=(8,8))
Hide code cell output
../_images/5dff313c524bc0ca36774bb1371eb90c7691bbb17e265d1f6ae6980222c36bb4.png ../_images/eaae3f8f8a982f9425f9d844224467575bb6b92db20a836626eeb4a01b813286.png

38CpG-AMLsignature

Hide code cell source
for dataset, trial in zip([df_px2, df_test], 
                          ['Discovery', 'Validation']):
    draw_kaplan_meier(model_name='38CpG-AMLsignature Categorical',
                        df=dataset,
                        save_survival_table=False,
                        save_plot=False,
                        show_ci=False,
                        add_risk_counts=False,
                        trialname=trial,
                        figsize=(8,8))
Hide code cell output
../_images/8bbf7f60f5a2421a73f504bf8049c17c5a1dda6eff5d9e63077c90a4622461b0.png ../_images/45d5bebe157dbc9f0eb8f5a37a7937cbd013fde8411174dd8f281e903a8fc1ca.png

Per risk group#

AML Epigenomic Risk

Hide code cell source
for dataset, trial in zip([df_px2, df_test], ['Discovery', 'Validation']):

    risk_groups = ['High Risk', 'Low Risk', 'Standard Risk']
    for risk_group in risk_groups:
        draw_kaplan_meier(
            model_name=model_name,
            df=dataset[dataset['Risk Group'] == risk_group],
            save_plot=False,
            save_survival_table=False,
            add_risk_counts=False,
            trialname=f'{trial} {risk_group}',
            figsize=(8, 8))
Hide code cell output
../_images/159f4d27b8683830271bd04536cfb6b3bee87ab35af3288e3726696c2a9496fc.png ../_images/1ce3afc9eb4e9d52695bf509a77c01929b60a59848eec91bf2e79be9fc610589.png ../_images/7951e4da64ef2770c0d2f87f44d1cd2644092015cb35109ed2fcaab17f536736.png ../_images/a3fb2d2df2d7c911bd2c083e454ae87344723a52e8600b84125a7c162ef27cf9.png ../_images/94b3be4932c6b63bc61ccd1bb6837e098136e680f04124a51c32a08fe2a1710c.png ../_images/025505c1f3389633537b92cd16b50b07b1d2c244bb979f128003c3b069023a8a.png

38CpG-AMLsignature-37CpGs

Hide code cell source
for dataset, trial in zip([df_px2, df_test], ['Discovery', 'Validation']):

    risk_groups = ['High Risk', 'Low Risk', 'Standard Risk']
    for risk_group in risk_groups:
        draw_kaplan_meier(
            model_name= '38CpG-AMLsignature Categorical',
            df=dataset[dataset['Risk Group'] == risk_group],
            save_plot=False,
            save_survival_table=False,
            add_risk_counts=False,
            trialname=f'{trial} {risk_group}',
            figsize=(8, 8))
Hide code cell output
../_images/50792051d53e0475e76566fa97dec9d1988216c5b77b2d1a736bb009aab992d2.png ../_images/eaf91beb83a633b616cf6c6794ab0154ee2906b28bd783ab8333dfab3367f566.png ../_images/e3baba426f913c4addb607d38f70b3c98ae4bda62d8dfbba036f26e2ad1ec98d.png ../_images/a4d1a243a71630351b85793703f3e4c47fbc52796d38ee47fb1bb4c70f5fc563.png ../_images/ed5e24f7f4dcf7e4e70b1fd147f4f1bc18776ba51f70f4d1fe423de7821bd268.png ../_images/fbfad7acc37cf07c4b989df1b857137e868169b7271ec5b8e76a2c200a535ab0.png

Per risk group (AAML1831 COG)#

AML Epigenomic Risk

Hide code cell source
for dataset, trial in zip([df_px2],['Discovery']):

    risk_groups = ['High', 'Low', 'Standard']
    for risk_group in risk_groups:
        draw_kaplan_meier(
            model_name=model_name,
            df=dataset[dataset['Risk Group AAML1831'] == risk_group],
            save_plot=False,
            save_survival_table=False,
            add_risk_counts=False,
            trialname=f'{trial} {risk_group} Risk',
            figsize=(8, 8))
Hide code cell output
../_images/79d6be65de7b1a48fa5c1d502c99c2aa7434ba9d685acf61a86a4438e78e71e8.png ../_images/36dd2a4044f526e8b8657a46ee26e9e15438f40cea30daa9d2f1c659187b58d2.png ../_images/32b429aebfab1bb02b3de6b7bbe700d2e7c0b335083715dd44f30b9a8f7b0480.png

38CpG-AMLsignature-37CpGs

Hide code cell source
for dataset, trial in zip([df_px2],['Discovery']):

    risk_groups = ['High', 'Low', 'Standard']
    for risk_group in risk_groups:
        draw_kaplan_meier(
            model_name='38CpG-AMLsignature Categorical',
            df=dataset[dataset['Risk Group AAML1831'] == risk_group],
            save_plot=False,
            save_survival_table=False,
            add_risk_counts=False,
            trialname=f'{trial} {risk_group} Risk',
            figsize=(8, 8))
Hide code cell output
../_images/449828759edd8634aa5946823405a3d73a77bc9ae459e82fd25f555d41b1e573.png ../_images/16642e83b8518d9bfacade8dcae4db1f1357b2dc6a532a10ace1e51a19d2f76f.png ../_images/2e9eee6de6f4e11eb51ae4d2f1190c9c530b655e52f5c6552a6575d4227cdf4c.png

Forest Plots#

With MRD 1 and BM blast (%)#

AML Epigenomic Risk

Hide code cell source
for dataset, trial in zip([df_px2, df_test], ['Discovery', 'Validation']):
    
    df_ = dataset.copy()
    df_['BM leukemic blasts (%)'] = pd.cut(df_['BM leukemic blasts (%)'], bins=[0,50,100], labels=['≤50', '>50'])
    df_['AML_Epigenomic_Risk'] = df_['AML Epigenomic Risk']
    df_['MethylScoreAML_Categorical'] = df_['38CpG-AMLsignature Categorical']
    df_['os_time_5y'] = df_['os.time at 5y']
    df_['os_evnt_5y'] = df_['os.evnt at 5y']
    df_['efs_time_5y'] = df_['efs.time at 5y']
    df_['efs_evnt_5y'] = df_['efs.evnt at 5y']

    draw_forest_plot_withBMblast(time='os_time_5y',
                        event='os_evnt_5y',
                        df=df_,
                        trialname=trial,
                        model_name='AML_Epigenomic_Risk',
                        save_plot=False)

    draw_forest_plot_withBMblast(time='efs_time_5y',
                        event='efs_evnt_5y',
                        df=df_,
                        trialname=trial,
                        model_name='AML_Epigenomic_Risk',
                        save_plot=False)
Hide code cell output
../_images/bd20205cbfee80942d874c431510b794d6c6d8fa3e1e165b582607fc1fdc66d1.png ../_images/217b7f06bbd652f533b52323527a8c741abad5da9e8b2f9d61993fda62aad865.png ../_images/4633ab017239698ebced0935dc96c5fc44a3c7f4c26f952408992018977a7ebb.png ../_images/4feb393ee82d8b5edfa729a736c6d8c7aab5d0496d13d865c9ac7fb4578588a4.png

38CpG-AMLsignature-37CpGs

Hide code cell source
for dataset, trial in zip([df_px2, df_test], ['Discovery', 'Validation']):


    draw_forest_plot_withBMblast(time='os_time_5y',
                        event='os_evnt_5y',
                        df=df_,
                        trialname=trial,
                        model_name='MethylScoreAML_Categorical',
                        save_plot=False)

    draw_forest_plot_withBMblast(time='efs_time_5y',
                        event='efs_evnt_5y',
                        df=df_,
                        trialname=trial,
                        model_name='MethylScoreAML_Categorical',
                        save_plot=False)
Hide code cell output
../_images/09112a7b813b99ab5a421c273cda8d54b24801c39172e8071a372d6ea70d4ed0.png ../_images/d806dc412382f76ee5971e5c7b85ccd4c552ae0f6406b4e2f314ad4ce6118401.png ../_images/c9e234bbfc39370da85caae7e46cab5bf61e3812dca5979f66db6500028c2533.png ../_images/885a513e3c4f4e2b2e99852eae221f7788f019b94e0721297a341743400c440a.png

ROC AUC performance#

Diagnostic Model#

Hide code cell source
def process_dataset_for_multiclass_auc(df):
    # One hot encode `df_dx['AL Epigenomic Subtype']`
    df_dx_dummies = pd.get_dummies(df['WHO 2022 Diagnosis'])

    # transform boolean columns to integer
    df_dx_dummies = df_dx_dummies.astype(int)

    # join the one hot encoded columns with the original dataframe
    df_dx_auc = pd.concat([df.iloc[:, -34:-6], df_dx_dummies], axis=1)

    return df_dx_auc, df_dx_dummies

df_dx_auc_train, df_dx_dummies_train = process_dataset_for_multiclass_auc(df_dx)
df_dx_auc_cog, df_dx_dummies_cog = process_dataset_for_multiclass_auc(df_px2)
df_dx_auc_test, df_dx_dummies_test = process_dataset_for_multiclass_auc(df_test)

p1 = plot_multiclass_roc_auc(df_dx_auc_train, df_dx_dummies_train.columns, title='Discovery')
p2 = plot_multiclass_roc_auc(df_dx_auc_cog, df_dx_dummies_cog.columns, title='Discovery COG peds AML')
p3 = plot_multiclass_roc_auc(df_dx_auc_test, df_dx_dummies_test.columns, title='Validation')

# Create a gridplot
p = gridplot([
    [p1, p2, p3,],
    ], toolbar_location='above')

show(p)
Hide code cell output

Prognostic models#

Discovery#

Hide code cell source
df_cat = df_px2[['os.evnt at 5y', '38CpG-AMLsignature Categorical', 'AML Epigenomic Risk']]
df_cont = df_px2[['os.evnt at 5y', '38CpG-AMLsignature', 'P(Death) at 5y']]

df_cont = df_cont.rename(columns={'P(Death) at 5y':'AML Epigenomic Risk (PaCMAP-LGBM)',
                                  '38CpG-AMLsignature': '38CpG-AMLsignature (EWAS-CoxPH)'})

df_cat = df_cat.rename(columns={'AML Epigenomic Risk':'AML Epigenomic Risk (PaCMAP-LGBM)',
                                  '38CpG-AMLsignature Categorical': '38CpG-AMLsignature (EWAS-CoxPH)'})

risk = df_px2[['Risk Group AAML1831','Risk Group']]

low_high_dict = {'Low': 0, 'Low Risk': 0,
                'Standard':0.5, 'Standard Risk': 0.5,
                'High': 1, 'High Risk': 1}

risk['Risk Group'] = risk['Risk Group'].map(low_high_dict)
risk['Risk Group AAML1831'] = risk['Risk Group AAML1831'].map(low_high_dict)

df_cat['AML Epigenomic Risk (PaCMAP-LGBM)'] = df_cat['AML Epigenomic Risk (PaCMAP-LGBM)'].map(low_high_dict)
df_cat['38CpG-AMLsignature (EWAS-CoxPH)'] = df_cat['38CpG-AMLsignature (EWAS-CoxPH)'].map(low_high_dict)

df_cont_risk = df_cont.join(risk)
df_cat_risk = df_cat.join(risk)

df_cont_risk = df_cont_risk.fillna(0.5)
df_cat_risk = df_cat_risk.fillna(0.5)

p1 = plot_roc_auc(df_cont_risk, 'os.evnt at 5y',title= 'Continuous (prob. of death at 5y)')
p2 = plot_roc_auc(df_cat_risk, 'os.evnt at 5y',title= 'Categorical (high-low risk)')

# Create a gridplot
p = gridplot([[p1, p2]], toolbar_location='above')

show(p)
Hide code cell output

Validation#

Hide code cell source
df_cat = df_test[['os.evnt at 5y', 'AML Epigenomic Risk', '38CpG-AMLsignature Categorical']]
df_cont = df_test[['os.evnt at 5y', 'P(Death) at 5y', '38CpG-AMLsignature']]

df_cont = df_cont.rename(columns={'P(Death) at 5y':'AML Epigenomic Risk (PaCMAP-LGBM)',
                                  '38CpG-AMLsignature': '38CpG-AMLsignature (EWAS-CoxPH)'})

df_cat = df_cat.rename(columns={'AML Epigenomic Risk':'AML Epigenomic Risk (PaCMAP-LGBM)',
                                  '38CpG-AMLsignature Categorical': '38CpG-AMLsignature (EWAS-CoxPH)'})

risk = df_test[['Risk Group']]
risk['Risk Group'] = risk['Risk Group'].map(low_high_dict)

df_cat['AML Epigenomic Risk (PaCMAP-LGBM)'] = df_cat['AML Epigenomic Risk (PaCMAP-LGBM)'].map(low_high_dict)
df_cat['38CpG-AMLsignature (EWAS-CoxPH)'] = df_cat['38CpG-AMLsignature (EWAS-CoxPH)'].map(low_high_dict)

df_cont_risk_test = df_cont.join(risk)
df_cat_risk_test = df_cat.join(risk)

# Rename `Risk Group` to `Risk Group AML02,08`
df_cont_risk_test = df_cont_risk_test.rename(columns={'Risk Group':'Risk Group AML02-08'})
df_cat_risk_test = df_cat_risk_test.rename(columns={'Risk Group':'Risk Group AML02-08'})

p1 = plot_roc_auc(df_cont_risk_test, 'os.evnt at 5y',title= 'Continuous (prob. of death at 5y)')
p2 = plot_roc_auc(df_cat_risk_test, 'os.evnt at 5y',title= 'Categorical (high-low risk)')

# Create a gridplot
p = gridplot([[p1, p2]], toolbar_location='above')

show(p)
Hide code cell output

Pearson Correlation#

Discovery#

Hide code cell source
draw_scatter_pearson(df=df_cont_risk,x='38CpG-AMLsignature (EWAS-CoxPH)', y='AML Epigenomic Risk (PaCMAP-LGBM)',s=20)

df_cont_risk.iloc[:,1:].corr().round(2)
Hide code cell output
../_images/ef162dbdf8f7c3e0ad1d00597fc2fc5766e8130f29e2478a5404d5647b6872fc.png
38CpG-AMLsignature (EWAS-CoxPH) AML Epigenomic Risk (PaCMAP-LGBM) Risk Group AAML1831 Risk Group
38CpG-AMLsignature (EWAS-CoxPH) 1.00 0.74 0.50 0.54
AML Epigenomic Risk (PaCMAP-LGBM) 0.74 1.00 0.53 0.59
Risk Group AAML1831 0.50 0.53 1.00 0.62
Risk Group 0.54 0.59 0.62 1.00

Validation#

Hide code cell source
draw_scatter_pearson(df=df_cont_risk_test,x='38CpG-AMLsignature (EWAS-CoxPH)', y='AML Epigenomic Risk (PaCMAP-LGBM)',s=20)

df_cont_risk_test.iloc[:,1:].corr().round(2)
Hide code cell output
../_images/1e42e6fdcd2bdc36dbebe9d10231686f4866d1fec3595f5ee9271d557125be2d.png
AML Epigenomic Risk (PaCMAP-LGBM) 38CpG-AMLsignature (EWAS-CoxPH) Risk Group AML02-08
AML Epigenomic Risk (PaCMAP-LGBM) 1.00 0.69 0.51
38CpG-AMLsignature (EWAS-CoxPH) 0.69 1.00 0.46
Risk Group AML02-08 0.51 0.46 1.00

Sankey plots#

Note

Sankey plots below compare the distribution of categories. The width of the lines is proportional to the number of patients in each group.

Samples with annotated diagnosis info#

Hide code cell source
colors = get_custom_color_palette()


draw_sankey_plot(df_train, 'WHO 2022 Diagnosis', 'AL Epigenomic Subtype', colors,
                 title='Discovery cohort', fig_size=(4, 11),
                 fontsize=8, nan_action='drop')

draw_sankey_plot(df_px2, 'WHO 2022 Diagnosis', 'AL Epigenomic Subtype', colors,
                 title= 'Discovery cohort (COG peds AML Dx samples only)',fig_size=(4, 10),
                 fontsize=8, nan_action='drop')

draw_sankey_plot(df_test, 'WHO 2022 Diagnosis', 'AL Epigenomic Subtype', colors,
                 title= 'Validation cohort',fig_size=(3, 7),
                 fontsize=8, nan_action='drop')
Hide code cell output
../_images/0182abd1fccce025c9dc67f6fc22c552c8e3f0ba7a3d7c0ac195be824a4fd18c.png ../_images/41d83ede5289e183af8564c466bd928be2f28220222ba8790d4a41148df16f10.png ../_images/1859998707b1db7b0058d6aaeef3d614c9e8d078569fb912f028efb13ebde2c0.png

Predictions in samples for which no WHO 22 Dx data was available#

Hide code cell source
draw_sankey_plot(df_train, 'WHO 2022 Diagnosis', 'AL Epigenomic Subtype', colors,
                 title='Discovery cohort', fig_size=(4, 9),
                 fontsize=8, nan_action='keep only')

draw_sankey_plot(df_px2, 'WHO 2022 Diagnosis', 'AL Epigenomic Subtype', colors,
                 title= 'Discovery cohort (COG peds AML Dx samples only)',fig_size=(4, 8),
                 fontsize=8, nan_action='keep only')

draw_sankey_plot(df_test, 'WHO 2022 Diagnosis', 'AL Epigenomic Subtype', colors,
                 title= 'Validation cohort',fig_size=(4, 8),
                 fontsize=8, nan_action='keep only')
Hide code cell output
../_images/9a315cd3038ce65abd97ca4c326d32fbdc9b17bb9796d5af1fc199677c7f1e50.png ../_images/b03b5fc2ea367db68d923a02837e34ccd6836be88115963f69dd103f35e3d594.png ../_images/e59ebe224e4f5002b72b2b397c33fd5da25adc635284bf551ff6c2fb95bd724b.png

Reason for unclassified samples#

Hide code cell source
draw_sankey_plot(df_train, 'WHO 2022 Diagnosis', 'Primary Cytogenetic Code', colors,
                 title='Discovery cohort', fig_size=(4, 6),
                 fontsize=8, nan_action='keep only')

draw_sankey_plot(df_px2, 'WHO 2022 Diagnosis', 'Gene Fusion', colors,
                 title= 'Discovery cohort (COG peds AML Dx samples only)',fig_size=(4, 9),
                 fontsize=8, nan_action='keep only')

draw_sankey_plot(df_test, 'WHO 2022 Diagnosis', 'Primary Cytogenetic Code', colors,
                 title= 'Validation cohort',fig_size=(2, 3),
                 fontsize=8, nan_action='keep only')
Hide code cell output
../_images/5547d743061f53c663c372595271c713969e8ae8a83e1c8ccd272ec49983be08.png ../_images/ac9587c333e09cbe5df727ef44f52e94138363f65f23a3ed27619bdf5b9564d6.png ../_images/bd8c1a0ba9afc90b3e366dce02a1dd5f666d80160be98e463349a8cec8bbef14.png

Risk group comparison in COG#

Hide code cell source
draw_sankey_plot(df_px2, 'Risk Group', 'Risk Group AAML1831', colors,
                 title= 'Discovery cohort (COG peds AML Dx samples only)',fig_size=(2, 4),
                 fontsize=8, nan_action='drop')

draw_sankey_plot(df_px2, 'Risk Group AAML1831', 'AML Epigenomic Risk', colors,
                 title= 'Discovery cohort (COG peds AML Dx samples only)',fig_size=(2, 4),
                 fontsize=8, nan_action='drop')
Hide code cell output
../_images/d848ec89f21d0877eb296391dacffef0cbed15dd349789484a91659d40040ad0.png ../_images/fdb6dfd0e36756be0859d24c790130645df52c314739df4cb656c7eef3765118.png

Px and Dx model comparison#

Hide code cell source
draw_sankey_plot(df_train, 'AML Epigenomic Risk', 'AL Epigenomic Subtype', colors,
                 title='Discovery cohort', fig_size=(3, 10),
                 fontsize=8, nan_action='drop')

draw_sankey_plot(df_px2, 'AML Epigenomic Risk', 'AL Epigenomic Subtype', colors,
                 title= 'Discovery cohort (COG peds AML Dx samples only)',fig_size=(3, 10),
                 fontsize=8, nan_action='drop')

draw_sankey_plot(df_test, 'AML Epigenomic Risk', 'AL Epigenomic Subtype', colors,
                 title= 'Validation cohort',fig_size=(3, 8),
                 fontsize=8, nan_action='drop')
Hide code cell output
../_images/e96c57579d9a14514b5a77d733df8a228615a90ab44c69ea55b777d294a750ea.png ../_images/83a0662ed8efc4d736a84a81d326d2b14ba3656de6534b9edc8943856662ae0b.png ../_images/e8ec9b942c98b3ce44a0d9e311227dfc9fe327f6c80ead0c9f21783104d9bcbc.png

Nanopore test results#

Hide code cell source
dfsank = df[df['Clinical Trial'] == 'UF HemBank']
dfsank = dfsank.reset_index()

# If values are repeated in dfsank['Dx at Acquisition], replace second value with empty string
dfsank['Dx at Acquisition'] = dfsank['Dx at Acquisition'].where(~dfsank['Dx at Acquisition'].duplicated(keep='last'), '')

dfsank['Diagnosis --> Patient Sample'] = dfsank['Dx at Acquisition'] + '  ' + dfsank['index']

draw_sankey_plot(dfsank, 'Diagnosis --> Patient Sample','AL Epigenomic Subtype',
                 title= 'Specimen-to-result Test Cohort',fig_size=(6, 8),
                 fontsize=11, nan_action='drop', colors=colors)
Hide code cell output
../_images/e9d1a88a7271e27b82830d92068bdd9e30b6200f4bd0ad2b3b809155921cb3f8.png

Performance metrics#

AML Epigenomic Risk#

Hide code cell source
plot_confusion_matrix_stacked(df_px2, df_test, 'os.evnt at 5y', 'AML Epigenomic Risk_int','os.evnt at 5y')
Hide code cell output
../_images/cb95bad2d5c77f2578653979b1c657e26f7abc7ad9feb952b15c2fdd217174e4.png
Metrics:
|            |   Accuracy |   Sensitivity |   Specificity |   Precision |   F1-score |   AUC-ROC |
|:-----------|-----------:|--------------:|--------------:|------------:|-----------:|----------:|
| Train      |      0.704 |         0.74  |         0.684 |       0.566 |      0.641 |     0.712 |
| Validation |      0.7   |         0.733 |         0.686 |       0.5   |      0.595 |     0.71  |

38CpG-AMLsignature#

Hide code cell source
plot_confusion_matrix_stacked(df_px2, df_test, 'os.evnt at 5y', '38CpG-AMLsignature_cat_bin','os.evnt at 5y')
Hide code cell output
../_images/5d6b86047600370dc3bceb25b8d386d9386db9cad86456ddb5bffc5294850129.png
Metrics:
|            |   Accuracy |   Sensitivity |   Specificity |   Precision |   F1-score |   AUC-ROC |
|:-----------|-----------:|--------------:|--------------:|------------:|-----------:|----------:|
| Train      |      0.675 |         0.746 |         0.637 |       0.533 |      0.621 |     0.691 |
| Validation |      0.615 |         0.783 |         0.543 |       0.423 |      0.55  |     0.663 |

AL Epigenomic Subtype#

Hide code cell source
plot_confusion_matrix_stacked(df_dx, df_test, 'WHO 2022 Diagnosis', 'AL Epigenomic Subtype', 'WHO 2022 Diagnosis', figsize=(22,14))
Hide code cell output
../_images/39646278c37bfee69dc0d331d117d0f2835d717070ae4f9b148ed5af96ea2d59.png
Metrics:
|            |   Accuracy |   Macro F1 |   Weighted F1 |   Cohen's Kappa |
|:-----------|-----------:|-----------:|--------------:|----------------:|
| Train      |      0.963 |      0.948 |         0.963 |           0.96  |
| Validation |      0.901 |      0.46  |         0.94  |           0.859 |

Box plots#

AML Epigenomic Risk#

Hide code cell source
draw_boxplot(df=df_test,x='Risk Group', y='P(Death) at 5y',
                order=['High Risk', 'Standard Risk', 'Low Risk'],
                trialname='Validation', hue=model_name,
                save_plot=False, figsize=(4,4))

draw_boxplot(df=df_test,x='MRD 1 Status', y='P(Death) at 5y',
                order=['Positive','Negative'],
                trialname='Validation', hue=model_name,
                save_plot=False, figsize=(4,4))

draw_boxplot(df=df_test,x='Primary Cytogenetic Code', y='P(Death) at 5y',
                order='auto',
                trialname='Validation', hue=model_name,
                save_plot=False, figsize=(4,4))
Hide code cell output
../_images/028dffa11ad606703b1370dbfb75bd3a80eef5634c7dda0c1fea21837a397ab0.png ../_images/2867c8a5657d5762e4f3b03cd524314256d6f36ad1652b968416c82df35ca70a.png ../_images/10e17ee42baedacbf16fe2cfaa06d857b5b52064722b46594f389939df7c522a.png

38CpG-AMLsignature#

Hide code cell source
draw_boxplot(df=df_test,x='Risk Group', y='38CpG-AMLsignature',
                order=['High Risk', 'Standard Risk', 'Low Risk'],
                trialname='Validation', hue=model_name,
                save_plot=False, figsize=(4,4))

draw_boxplot(df=df_test,x='MRD 1 Status', y='38CpG-AMLsignature',
                order=['Positive','Negative'],
                trialname='Validation', hue=model_name,
                save_plot=False, figsize=(4,4))

draw_boxplot(df=df_test,x='Primary Cytogenetic Code', y='38CpG-AMLsignature',
                order='auto',
                trialname='Validation', hue=model_name,
                save_plot=False, figsize=(4,4))
Hide code cell output
../_images/43869c04be942d3a1b04250a5601dba1837c50c604666c8f81ade98696008122.png ../_images/de26861f070ccb1c221c9fc07a1453d633b2b83117f8cc04a9929d7a9dd29153.png ../_images/083970f7f0050f066ea7f1b39873c04193eb74bd846beff0bd1782bd702d0395.png

Stacked bar plots#

AML Epigenomic Risk#

Hide code cell source
model_name = 'AML Epigenomic Risk'
draw_stacked_barplot(df=df_test,x='MRD 1 Status', y=model_name,
             order=['Positive','Negative'],
             trialname='Validation', hue=model_name,
             save_plot=False, figsize=(4,3))

draw_stacked_barplot(df=df_test,x='Risk Group', y=model_name,
                order=['High Risk', 'Standard Risk', 'Low Risk'],
                trialname='Validation', hue=model_name,
                save_plot=False, figsize=(4,3), fontsize=9)

draw_stacked_barplot(df=df_test,x='Primary Cytogenetic Code', y=model_name,
                order='auto',
                trialname='Validation', hue=model_name,
                save_plot=False, figsize=(4,3), fontsize=6)
Hide code cell output
../_images/0d4b26fed2754c808f6030a994c2e730791c282f6b2c9bd3d2ff1b5a85704268.png ../_images/3b2dfae756629e82c854e1144fdf60ce2b7e0deb321ca06c4bd8e13bcc6096b6.png ../_images/540a22ca89d25a13378fb9fc8268bbd5696d714cb9426145ff4fd94f19a73b7a.png

38CpG-AMLsignature#

Hide code cell source
model_name = '38CpG-AMLsignature Categorical'
draw_stacked_barplot(df=df_test,x='MRD 1 Status', y=model_name,
             order=['Positive','Negative'],
             trialname='Validation', hue=model_name,
             save_plot=False, figsize=(4,3))

draw_stacked_barplot(df=df_test,x='Risk Group', y=model_name,
                order=['High Risk', 'Standard Risk', 'Low Risk'],
                trialname='Validation', hue=model_name,
                save_plot=False, figsize=(4,3), fontsize=9)

draw_stacked_barplot(df=df_test,x='Primary Cytogenetic Code', y=model_name,
                order='auto',
                trialname='Validation', hue=model_name,
                save_plot=False, figsize=(4,3), fontsize=6)
Hide code cell output
../_images/4e9c7b845bc76d234407ee4de3de4903bfb043c6ab57e41f7cd7f38b43ce7845.png ../_images/cc4b5e66e9f6d7641418030d51547fb03638cfbbfeedb4bd6047dc2dc78fe90f.png ../_images/0f3a1e0e2a9efb031a35eec943ca9a49410e0cd363d4c5413950b87d771a2061.png

Watermark#

Author: Francisco_Marchi@Lamba_Lab_UF

Last updated: 2024-09-18

Python implementation: CPython
Python version       : 3.10.13
IPython version      : 8.27.0

pandas    : 2.2.2
seaborn   : 0.13.2
matplotlib: 3.9.2
tableone  : 0.8.0
sklearn   : 1.5.2
lifelines : 0.28.0
scipy     : 1.12.0

Compiler    : GCC 11.4.0
OS          : Linux
Release     : 5.15.133.1-microsoft-standard-WSL2
Machine     : x86_64
Processor   : x86_64
CPU cores   : 32
Architecture: 64bit

Git repo: git@github.com:f-marchi/ALMA.git